caaspp_scores_path <- "data/sb_ca2023_1_csv_v1.txt"
caaspp <- read_delim(caaspp_scores_path, delim="^")
## Rows: 102489 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "^"
## chr (27): County Code, District Code, School Code, Test Type, Total Tested a...
## dbl (5): Test Year, Student Group ID, Grade, Test ID, Type ID
## lgl (1): Filler
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(caaspp) <- c(
"county_code",
"district_code",
"school_code",
"filler",
"test_year",
"student_group_id",
"test_type",
"total_tested_reporting_level",
"total_tested_with_scores",
"grade",
"test_id",
"students_enrolled",
"students_tested",
"mean_scale_score",
"pct_standard_exceeded",
"pct_standard_met",
"pct_standard_met_and_above",
"pct_standard_nearly_met",
"pct_standard_not_met",
"students_with_scores",
"area1_pct_above_standard",
"area1_pct_near_standard",
"area1_pct_below_standard",
"area2_pct_above_standard",
"area2_pct_near_standard",
"area2_pct_below_standard",
"area3_pct_above_standard",
"area3_pct_near_standard",
"area3_pct_below_standard",
"area4_pct_above_standard",
"area4_pct_near_standard",
"area4_pct_below_standard",
"type_id"
)
lausd_caaspp <- caaspp %>%
filter(district_code=="64733") %>%
inner_join(frpm_lausd, by="school_code") %>%
filter(student_group_id == 1) %>% # All Students only
mutate(
test_subject = case_when(
test_id == 1 ~ "ELA",
test_id == 2 ~ "Math",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(test_subject)) # Keep only ELA and Math
# 2. Convert test scores to numeric (some may be "*")
lausd_caaspp_numeric <- lausd_caaspp %>%
mutate(across(
c(mean_scale_score,
pct_standard_met_and_above,
pct_standard_not_met),
~ as.numeric(na_if(., "*"))
))
# 3. Aggregate to school-subject-year level
school_scores <- lausd_caaspp_numeric %>%
group_by(school_code, test_year, test_subject) %>%
summarise(
avg_pct_met_above = mean(pct_standard_met_and_above, na.rm = TRUE),
avg_pct_not_met = mean(pct_standard_not_met, na.rm = TRUE),
avg_scale_score = mean(mean_scale_score, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_wider(
names_from = test_subject,
values_from = c(avg_pct_met_above, avg_pct_not_met, avg_scale_score),
names_glue = "{.value}_{test_subject}"
)
glimpse(school_scores)
## Rows: 969
## Columns: 8
## $ school_code <chr> "0100289", "0100669", "0100677", "0100743", "01…
## $ test_year <dbl> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023,…
## $ avg_pct_met_above_ELA <dbl> 28.42250, 34.91800, 64.20000, 43.83400, 45.7400…
## $ avg_pct_met_above_Math <dbl> 28.4725, 14.6680, 34.5700, 31.5940, 26.8800, 27…
## $ avg_pct_not_met_ELA <dbl> 47.72000, 42.64000, 17.28000, 26.84200, 29.7900…
## $ avg_pct_not_met_Math <dbl> 38.26500, 60.04200, 37.04000, 37.50200, 55.9100…
## $ avg_scale_score_ELA <dbl> 2409.033, 2492.700, 2610.000, 2473.325, 2565.90…
## $ avg_scale_score_Math <dbl> 2438.567, 2457.350, 2582.900, 2459.500, 2536.60…
##
chronic_absenteeism ~ frpm_percent @ 35
Estimate (tau): 6.478
SE: 4.216
95% CI: [-1.784, 14.741]
p-value: 0.124
Call: rdplot
Number of Obs. 1001 Kernel Uniform
Number of Obs. 68 933 Eff. Number of Obs. 68 933 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000
##
btb ~ frpm_percent @ 35
Estimate (tau): -0.127
SE: 0.162
95% CI: [-0.445, 0.190]
p-value: 0.432
Call: rdplot
Number of Obs. 1001 Kernel Uniform
Number of Obs. 68 933 Eff. Number of Obs. 68 933 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000
##
avg_pct_met_above_ELA ~ frpm_percent @ 35
Estimate (tau): -12.850
SE: 6.001
95% CI: [-24.612, -1.088]
p-value: 0.032
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 66 894 Eff. Number of Obs. 66 894 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000
##
avg_pct_met_above_Math ~ frpm_percent @ 35
Estimate (tau): -15.069
SE: 8.699
95% CI: [-32.120, 1.981]
p-value: 0.083
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 66 894 Eff. Number of Obs. 66 894 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000
##
avg_pct_not_met_ELA ~ frpm_percent @ 35
Estimate (tau): 9.376
SE: 4.539
95% CI: [0.480, 18.271]
p-value: 0.039
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 66 894 Eff. Number of Obs. 66 894 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000
##
avg_pct_not_met_Math ~ frpm_percent @ 35
Estimate (tau): 11.277
SE: 5.869
95% CI: [-0.226, 22.780]
p-value: 0.055
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 66 894 Eff. Number of Obs. 66 894 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000
##
avg_scale_score_ELA ~ frpm_percent @ 35
Estimate (tau): -24.406
SE: 25.897
95% CI: [-75.164, 26.351]
p-value: 0.346
Call: rdplot
Number of Obs. 958 Kernel Uniform
Number of Obs. 66 892 Eff. Number of Obs. 66 892 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000
##
avg_scale_score_Math ~ frpm_percent @ 35
Estimate (tau): -32.251
SE: 25.797
95% CI: [-82.812, 18.310]
p-value: 0.211
Call: rdplot
Number of Obs. 958 Kernel Uniform
Number of Obs. 66 892 Eff. Number of Obs. 66 892 Order poly. fit (p) 4 4 BW poly. fit (h) 35.000 65.000 Number of bins scale 1.000 1.000
##
chronic_absenteeism ~ frpm_percent @ 75
Estimate (tau): 1.488
SE: 4.687
95% CI: [-7.698, 10.674]
p-value: 0.751
Call: rdplot
Number of Obs. 1001 Kernel Uniform
Number of Obs. 235 766 Eff. Number of Obs. 235 766 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
btb ~ frpm_percent @ 75
Estimate (tau): -0.319
SE: 0.173
95% CI: [-0.658, 0.020]
p-value: 0.065
Call: rdplot
Number of Obs. 1001 Kernel Uniform
Number of Obs. 235 766 Eff. Number of Obs. 235 766 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_pct_met_above_ELA ~ frpm_percent @ 75
Estimate (tau): 8.095
SE: 4.953
95% CI: [-1.612, 17.802]
p-value: 0.102
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 227 733 Eff. Number of Obs. 227 733 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_pct_met_above_Math ~ frpm_percent @ 75
Estimate (tau): 3.355
SE: 6.315
95% CI: [-9.022, 15.731]
p-value: 0.595
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 227 733 Eff. Number of Obs. 227 733 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_pct_not_met_ELA ~ frpm_percent @ 75
Estimate (tau): -11.123
SE: 4.753
95% CI: [-20.438, -1.808]
p-value: 0.019
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 227 733 Eff. Number of Obs. 227 733 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_pct_not_met_Math ~ frpm_percent @ 75
Estimate (tau): -8.667
SE: 7.271
95% CI: [-22.918, 5.584]
p-value: 0.233
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 227 733 Eff. Number of Obs. 227 733 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_scale_score_ELA ~ frpm_percent @ 75
Estimate (tau): 32.740
SE: 17.269
95% CI: [-1.106, 66.586]
p-value: 0.058
Call: rdplot
Number of Obs. 958 Kernel Uniform
Number of Obs. 227 731 Eff. Number of Obs. 227 731 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_scale_score_Math ~ frpm_percent @ 75
Estimate (tau): 22.756
SE: 12.822
95% CI: [-2.375, 47.888]
p-value: 0.076
Call: rdplot
Number of Obs. 958 Kernel Uniform
Number of Obs. 227 731 Eff. Number of Obs. 227 731 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
chronic_absenteeism ~ undup_pct @ 55
Estimate (tau): -0.188
SE: 3.485
95% CI: [-7.019, 6.643]
p-value: 0.957
Call: rdplot
Number of Obs. 1001 Kernel Uniform
Number of Obs. 114 887 Eff. Number of Obs. 114 887 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000
##
btb ~ undup_pct @ 55
Estimate (tau): 0.110
SE: 0.224
95% CI: [-0.330, 0.550]
p-value: 0.623
Call: rdplot
Number of Obs. 1001 Kernel Uniform
Number of Obs. 114 887 Eff. Number of Obs. 114 887 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000
##
avg_pct_met_above_ELA ~ undup_pct @ 55
Estimate (tau): -0.753
SE: 4.683
95% CI: [-9.932, 8.426]
p-value: 0.872
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 112 848 Eff. Number of Obs. 112 848 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000
##
avg_pct_met_above_Math ~ undup_pct @ 55
Estimate (tau): 4.277
SE: 7.507
95% CI: [-10.437, 18.990]
p-value: 0.569
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 112 848 Eff. Number of Obs. 112 848 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000
##
avg_pct_not_met_ELA ~ undup_pct @ 55
Estimate (tau): 2.401
SE: 3.469
95% CI: [-4.399, 9.201]
p-value: 0.489
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 112 848 Eff. Number of Obs. 112 848 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000
##
avg_pct_not_met_Math ~ undup_pct @ 55
Estimate (tau): -2.151
SE: 6.753
95% CI: [-15.386, 11.084]
p-value: 0.750
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 112 848 Eff. Number of Obs. 112 848 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000
##
avg_scale_score_ELA ~ undup_pct @ 55
Estimate (tau): 13.012
SE: 28.723
95% CI: [-43.284, 69.307]
p-value: 0.651
Call: rdplot
Number of Obs. 958 Kernel Uniform
Number of Obs. 112 846 Eff. Number of Obs. 112 846 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000
##
avg_scale_score_Math ~ undup_pct @ 55
Estimate (tau): 2.480
SE: 18.709
95% CI: [-34.189, 39.148]
p-value: 0.895
Call: rdplot
Number of Obs. 958 Kernel Uniform
Number of Obs. 112 846 Eff. Number of Obs. 112 846 Order poly. fit (p) 4 4 BW poly. fit (h) 55.000 45.000 Number of bins scale 1.000 1.000
##
chronic_absenteeism ~ undup_pct @ 75
Estimate (tau): -1.684
SE: 4.928
95% CI: [-11.342, 7.974]
p-value: 0.733
Call: rdplot
Number of Obs. 1001 Kernel Uniform
Number of Obs. 214 787 Eff. Number of Obs. 214 787 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
btb ~ undup_pct @ 75
Estimate (tau): -0.007
SE: 0.174
95% CI: [-0.348, 0.334]
p-value: 0.970
Call: rdplot
Number of Obs. 1001 Kernel Uniform
Number of Obs. 214 787 Eff. Number of Obs. 214 787 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_pct_met_above_ELA ~ undup_pct @ 75
Estimate (tau): 2.113
SE: 4.545
95% CI: [-6.795, 11.021]
p-value: 0.642
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 207 753 Eff. Number of Obs. 207 753 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_pct_met_above_Math ~ undup_pct @ 75
Estimate (tau): 4.140
SE: 5.419
95% CI: [-6.481, 14.760]
p-value: 0.445
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 207 753 Eff. Number of Obs. 207 753 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_pct_not_met_ELA ~ undup_pct @ 75
Estimate (tau): -2.009
SE: 4.585
95% CI: [-10.996, 6.978]
p-value: 0.661
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 207 753 Eff. Number of Obs. 207 753 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_pct_not_met_Math ~ undup_pct @ 75
Estimate (tau): -3.593
SE: 5.798
95% CI: [-14.957, 7.772]
p-value: 0.536
Call: rdplot
Number of Obs. 960 Kernel Uniform
Number of Obs. 207 753 Eff. Number of Obs. 207 753 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_scale_score_ELA ~ undup_pct @ 75
Estimate (tau): 2.250
SE: 23.602
95% CI: [-44.009, 48.510]
p-value: 0.924
Call: rdplot
Number of Obs. 958 Kernel Uniform
Number of Obs. 207 751 Eff. Number of Obs. 207 751 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
##
avg_scale_score_Math ~ undup_pct @ 75
Estimate (tau): 3.898
SE: 17.315
95% CI: [-30.038, 37.834]
p-value: 0.822
Call: rdplot
Number of Obs. 958 Kernel Uniform
Number of Obs. 207 751 Eff. Number of Obs. 207 751 Order poly. fit (p) 4 4 BW poly. fit (h) 75.000 25.000 Number of bins scale 1.000 1.000
outcome running_variable cutoff tau se ci_lower ci_upper p_value
######################################################
# GP RDD
# chronic_abseentism ~ FRPM % - 35 cut off
######################################################
rdd_res_absenteeism_frpm_35_cutoff <- gp_rdd(
df_clean$frpm_percent,
df_clean$chronic_absenteeism,
35
)
rdd_res_absenteeism_frpm_35_cutoff$tau # estimated effect
## [1] 7.865098
rdd_res_absenteeism_frpm_35_cutoff$se # standard error
## [1] 5.739285
rdd_res_absenteeism_frpm_35_cutoff$ci # confidence interval
## lower upper
## -3.383694 19.113891
rdd_result_plot_1 <- gp_rdd_plot(rdd_res_absenteeism_frpm_35_cutoff) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "Zoomed-In View Around the Cutoff")
print(rdd_result_plot_1)
######################################################
# GP RDD
# chronic_abseentism ~ FRPM % - 75 cut off
######################################################
# Example using formula interface:
rdd_res_absenteeism_frpm_75_cutoff <- gp_rdd(
df_clean$frpm_percent,
df_clean$chronic_absenteeism,
75
)
rdd_res_absenteeism_frpm_75_cutoff$tau # estimated effect
## [1] -0.07402481
rdd_res_absenteeism_frpm_75_cutoff$se # standard error
## [1] 6.059793
rdd_res_absenteeism_frpm_75_cutoff$ci # confidence interval
## lower upper
## -11.95100 11.80295
rdd_result_plot_2 <- gp_rdd_plot(rdd_res_absenteeism_frpm_75_cutoff) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "Zoomed-In View Around the Cutoff")
print(rdd_result_plot_2)
######################################################
# GP RDD
# BTB ~ FRPM % - 35 cut off
######################################################
rdd_res_absenteeism_BTB_35_cutoff <- gp_rdd(
df_clean$frpm_percent,
df_clean$btb,
35
)
rdd_res_absenteeism_BTB_35_cutoff$tau # estimated effect
## [1] -0.2305462
rdd_res_absenteeism_BTB_35_cutoff$se # standard error
## [1] 0.2237574
rdd_res_absenteeism_BTB_35_cutoff$ci # confidence interval
## lower upper
## -0.6691026 0.2080103
rdd_result_plot_3 <- gp_rdd_plot(rdd_res_absenteeism_BTB_35_cutoff) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "Zoomed-In View Around the Cutoff")
print(rdd_result_plot_3)
######################################################
# GP RDD
# BTB ~ FRPM % - 75 cut off
######################################################
rdd_res_absenteeism_BTB_75_cutoff <- gp_rdd(
df_clean$frpm_percent,
df_clean$btb,
75
)
rdd_res_absenteeism_BTB_75_cutoff$tau # estimated effect
## [1] -0.1589799
rdd_res_absenteeism_BTB_75_cutoff$se # standard error
## [1] 0.2079251
rdd_res_absenteeism_BTB_75_cutoff$ci # confidence interval
## lower upper
## -0.5665055 0.2485458
rdd_result_plot_4 <- gp_rdd_plot(rdd_res_absenteeism_BTB_75_cutoff) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "Zoomed-In View Around the Cutoff")
print(rdd_result_plot_4)
rdd_res_absenteeism_undup_55_cutoff <- gp_rdd(
df_clean$undup_pct,
df_clean$chronic_absenteeism,
55
)
rdd_res_absenteeism_undup_55_cutoff$tau
## [1] 3.060495
rdd_res_absenteeism_undup_55_cutoff$se
## [1] 7.58411
rdd_res_absenteeism_undup_55_cutoff$ci
## lower upper
## -11.80409 17.92508
rdd_plot_undup_55_absent <- gp_rdd_plot(rdd_res_absenteeism_undup_55_cutoff) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "Chronic Absenteeism ~ Unduplicated % (55% Cutoff)")
print(rdd_plot_undup_55_absent)
# GP RDD - chronic_abseentism ~ Unduplicated Pupil % - 75% cut off
rdd_res_absenteeism_undup_75_cutoff <- gp_rdd(
df_clean$undup_pct,
df_clean$chronic_absenteeism,
75
)
rdd_res_absenteeism_undup_75_cutoff$tau
## [1] -1.667907
rdd_res_absenteeism_undup_75_cutoff$se
## [1] 6.675245
rdd_res_absenteeism_undup_75_cutoff$ci
## lower upper
## -14.75115 11.41533
rdd_plot_undup_75_absent <- gp_rdd_plot(rdd_res_absenteeism_undup_75_cutoff) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "Chronic Absenteeism ~ Unduplicated % (75% Cutoff)")
print(rdd_plot_undup_75_absent)
rdd_res_btb_undup_55_cutoff <- gp_rdd(
df_clean$undup_pct,
df_clean$btb,
55
)
rdd_res_btb_undup_55_cutoff$tau
## [1] -0.02215456
rdd_res_btb_undup_55_cutoff$se
## [1] 0.2609895
rdd_res_btb_undup_55_cutoff$ci
## lower upper
## -0.5336846 0.4893755
rdd_plot_undup_55_btb <- gp_rdd_plot(rdd_res_btb_undup_55_cutoff) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "BTB Participation ~ Unduplicated % (55% Cutoff)")
print(rdd_plot_undup_55_btb)
rdd_res_btb_undup_75_cutoff <- gp_rdd(
df_clean$undup_pct,
df_clean$btb,
75
)
rdd_res_btb_undup_75_cutoff$tau
## [1] 0.094369
rdd_res_btb_undup_75_cutoff$se
## [1] 0.2249487
rdd_res_btb_undup_75_cutoff$ci
## lower upper
## -0.3465223 0.5352603
rdd_plot_undup_75_btb <- gp_rdd_plot(rdd_res_btb_undup_75_cutoff) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "BTB Participation ~ Unduplicated % (75% Cutoff)")
print(rdd_plot_undup_75_btb)
rdd_avg_pct_met_above_ela_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_met_above_ELA, 35)
rdd_avg_pct_met_above_ela_frpm_35$tau
## [1] -14.90419
rdd_avg_pct_met_above_ela_frpm_35$se
## [1] 6.742368
rdd_avg_pct_met_above_ela_frpm_35$ci
## lower upper
## -28.118984 -1.689388
rdd_plot_avg_pct_met_above_ela_frpm_35 <- gp_rdd_plot(rdd_avg_pct_met_above_ela_frpm_35) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "% MetAbove ELA ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_met_above_ela_frpm_35)
rdd_avg_pct_met_above_ela_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_met_above_ELA, 75)
rdd_avg_pct_met_above_ela_frpm_75$tau
## [1] 1.103441
rdd_avg_pct_met_above_ela_frpm_75$se
## [1] 6.080521
rdd_avg_pct_met_above_ela_frpm_75$ci
## lower upper
## -10.81416 13.02104
rdd_plot_avg_pct_met_above_ela_frpm_75 <- gp_rdd_plot(rdd_avg_pct_met_above_ela_frpm_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% MetAbove ELA ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_met_above_ela_frpm_75)
rdd_avg_pct_met_above_ela_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_met_above_ELA, 55)
rdd_avg_pct_met_above_ela_undup_55$tau
## [1] -6.078043
rdd_avg_pct_met_above_ela_undup_55$se
## [1] 7.770916
rdd_avg_pct_met_above_ela_undup_55$ci
## lower upper
## -21.308759 9.152673
rdd_plot_avg_pct_met_above_ela_undup_55 <- gp_rdd_plot(rdd_avg_pct_met_above_ela_undup_55) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "% MetAbove ELA ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_met_above_ela_undup_55)
rdd_avg_pct_met_above_ela_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_met_above_ELA, 75)
rdd_avg_pct_met_above_ela_undup_75$tau
## [1] -5.278574
rdd_avg_pct_met_above_ela_undup_75$se
## [1] 6.61937
rdd_avg_pct_met_above_ela_undup_75$ci
## lower upper
## -18.252301 7.695153
rdd_plot_avg_pct_met_above_ela_undup_75 <- gp_rdd_plot(rdd_avg_pct_met_above_ela_undup_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% MetAbove ELA ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_met_above_ela_undup_75)
rdd_avg_pct_met_above_math_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_met_above_Math, 35)
rdd_avg_pct_met_above_math_frpm_35$tau
## [1] -18.51735
rdd_avg_pct_met_above_math_frpm_35$se
## [1] 7.425478
rdd_avg_pct_met_above_math_frpm_35$ci
## lower upper
## -33.071017 -3.963677
rdd_plot_avg_pct_met_above_math_frpm_35 <- gp_rdd_plot(rdd_avg_pct_met_above_math_frpm_35) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "% MetAbove Math ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_met_above_math_frpm_35)
rdd_avg_pct_met_above_math_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_met_above_Math, 75)
rdd_avg_pct_met_above_math_frpm_75$tau
## [1] -0.3246283
rdd_avg_pct_met_above_math_frpm_75$se
## [1] 5.969782
rdd_avg_pct_met_above_math_frpm_75$ci
## lower upper
## -12.02519 11.37593
rdd_plot_avg_pct_met_above_math_frpm_75 <- gp_rdd_plot(rdd_avg_pct_met_above_math_frpm_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% MetAbove Math ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_met_above_math_frpm_75)
rdd_avg_pct_met_above_math_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_met_above_Math, 55)
rdd_avg_pct_met_above_math_undup_55$tau
## [1] -4.556328
rdd_avg_pct_met_above_math_undup_55$se
## [1] 7.710299
rdd_avg_pct_met_above_math_undup_55$ci
## lower upper
## -19.66824 10.55558
rdd_plot_avg_pct_met_above_math_undup_55 <- gp_rdd_plot(rdd_avg_pct_met_above_math_undup_55) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "% MetAbove Math ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_met_above_math_undup_55)
rdd_avg_pct_met_above_math_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_met_above_Math, 75)
rdd_avg_pct_met_above_math_undup_75$tau
## [1] -1.108402
rdd_avg_pct_met_above_math_undup_75$se
## [1] 6.384361
rdd_avg_pct_met_above_math_undup_75$ci
## lower upper
## -13.62152 11.40471
rdd_plot_avg_pct_met_above_math_undup_75 <- gp_rdd_plot(rdd_avg_pct_met_above_math_undup_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% MetAbove Math ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_met_above_math_undup_75)
rdd_avg_pct_not_met_ela_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_ELA, 35)
rdd_avg_pct_not_met_ela_frpm_35$tau
## [1] 11.0354
rdd_avg_pct_not_met_ela_frpm_35$se
## [1] 5.596307
rdd_avg_pct_not_met_ela_frpm_35$ci
## lower upper
## 0.06683512 22.00395665
rdd_plot_avg_pct_not_met_ela_frpm_35 <- gp_rdd_plot(rdd_avg_pct_not_met_ela_frpm_35) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "% Not Met ELA ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_not_met_ela_frpm_35)
rdd_avg_pct_not_met_ela_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_ELA, 75)
rdd_avg_pct_not_met_ela_frpm_75$tau
## [1] -2.400395
rdd_avg_pct_not_met_ela_frpm_75$se
## [1] 5.939738
rdd_avg_pct_not_met_ela_frpm_75$ci
## lower upper
## -14.042066 9.241277
rdd_plot_avg_pct_not_met_ela_frpm_75 <- gp_rdd_plot(rdd_avg_pct_not_met_ela_frpm_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% Not Met ELA ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_ela_frpm_75)
rdd_avg_pct_not_met_ela_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_ELA, 55)
rdd_avg_pct_not_met_ela_undup_55$tau
## [1] 5.978147
rdd_avg_pct_not_met_ela_undup_55$se
## [1] 7.40842
rdd_avg_pct_not_met_ela_undup_55$ci
## lower upper
## -8.54209 20.49838
rdd_plot_avg_pct_not_met_ela_undup_55 <- gp_rdd_plot(rdd_avg_pct_not_met_ela_undup_55) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "% Not Met ELA ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_not_met_ela_undup_55)
rdd_avg_pct_not_met_ela_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_ELA, 75)
rdd_avg_pct_not_met_ela_undup_75$tau
## [1] 3.960924
rdd_avg_pct_not_met_ela_undup_75$se
## [1] 6.545555
rdd_avg_pct_not_met_ela_undup_75$ci
## lower upper
## -8.868129 16.789977
rdd_plot_avg_pct_not_met_ela_undup_75 <- gp_rdd_plot(rdd_avg_pct_not_met_ela_undup_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% Not Met ELA ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_ela_undup_75)
rdd_avg_pct_not_met_math_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_Math, 35)
rdd_avg_pct_not_met_math_frpm_35$tau
## [1] 14.2913
rdd_avg_pct_not_met_math_frpm_35$se
## [1] 6.88293
rdd_avg_pct_not_met_math_frpm_35$ci
## lower upper
## 0.801007 27.781598
rdd_plot_avg_pct_not_met_math_frpm_35 <- gp_rdd_plot(rdd_avg_pct_not_met_math_frpm_35) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "% Not Met Math ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_frpm_35)
rdd_avg_pct_not_met_math_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_Math, 75)
rdd_avg_pct_not_met_math_frpm_75$tau
## [1] -3.591685
rdd_avg_pct_not_met_math_frpm_75$se
## [1] 7.159961
rdd_avg_pct_not_met_math_frpm_75$ci
## lower upper
## -17.62495 10.44158
rdd_plot_avg_pct_not_met_math_frpm_75 <- gp_rdd_plot(rdd_avg_pct_not_met_math_frpm_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% Not Met Math ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_frpm_75)
rdd_avg_pct_not_met_math_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_Math, 55)
rdd_avg_pct_not_met_math_undup_55$tau
## [1] 4.95941
rdd_avg_pct_not_met_math_undup_55$se
## [1] 8.965036
rdd_avg_pct_not_met_math_undup_55$ci
## lower upper
## -12.61174 22.53056
rdd_plot_avg_pct_not_met_math_undup_55 <- gp_rdd_plot(rdd_avg_pct_not_met_math_undup_55) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "% Not Met Math ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_undup_55)
rdd_avg_pct_not_met_math_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_Math, 75)
rdd_avg_pct_not_met_math_undup_75$tau
## [1] 1.601258
rdd_avg_pct_not_met_math_undup_75$se
## [1] 7.802175
rdd_avg_pct_not_met_math_undup_75$ci
## lower upper
## -13.69072 16.89324
rdd_plot_avg_pct_not_met_math_undup_75 <- gp_rdd_plot(rdd_avg_pct_not_met_math_undup_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% Not Met Math ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_undup_75)
rdd_avg_pct_not_met_math_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_Math, 35)
rdd_avg_pct_not_met_math_frpm_35$tau
## [1] 14.2913
rdd_avg_pct_not_met_math_frpm_35$se
## [1] 6.88293
rdd_avg_pct_not_met_math_frpm_35$ci
## lower upper
## 0.801007 27.781598
rdd_plot_avg_pct_not_met_math_frpm_35 <- gp_rdd_plot(rdd_avg_pct_not_met_math_frpm_35) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "% Not Met Math ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_frpm_35)
rdd_avg_pct_not_met_math_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_pct_not_met_Math, 75)
rdd_avg_pct_not_met_math_frpm_75$tau
## [1] -3.591685
rdd_avg_pct_not_met_math_frpm_75$se
## [1] 7.159961
rdd_avg_pct_not_met_math_frpm_75$ci
## lower upper
## -17.62495 10.44158
rdd_plot_avg_pct_not_met_math_frpm_75 <- gp_rdd_plot(rdd_avg_pct_not_met_math_frpm_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% Not Met Math ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_frpm_75)
rdd_avg_pct_not_met_math_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_Math, 55)
rdd_avg_pct_not_met_math_undup_55$tau
## [1] 4.95941
rdd_avg_pct_not_met_math_undup_55$se
## [1] 8.965036
rdd_avg_pct_not_met_math_undup_55$ci
## lower upper
## -12.61174 22.53056
rdd_plot_avg_pct_not_met_math_undup_55 <- gp_rdd_plot(rdd_avg_pct_not_met_math_undup_55) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "% Not Met Math ~ Undup (55% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_undup_55)
rdd_avg_pct_not_met_math_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_pct_not_met_Math, 75)
rdd_avg_pct_not_met_math_undup_75$tau
## [1] 1.601258
rdd_avg_pct_not_met_math_undup_75$se
## [1] 7.802175
rdd_avg_pct_not_met_math_undup_75$ci
## lower upper
## -13.69072 16.89324
rdd_plot_avg_pct_not_met_math_undup_75 <- gp_rdd_plot(rdd_avg_pct_not_met_math_undup_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "% Not Met Math ~ Undup (75% Cutoff)")
print(rdd_plot_avg_pct_not_met_math_undup_75)
rdd_avg_scale_score_ela_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_scale_score_ELA, 35)
rdd_avg_scale_score_ela_frpm_35$tau
## [1] -27.88893
rdd_avg_scale_score_ela_frpm_35$se
## [1] 26.80531
rdd_avg_scale_score_ela_frpm_35$ci
## lower upper
## -80.42637 24.64851
rdd_plot_avg_scale_score_ela_frpm_35 <- gp_rdd_plot(rdd_avg_scale_score_ela_frpm_35) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "Scale Score ELA ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_scale_score_ela_frpm_35)
rdd_avg_scale_score_ela_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_scale_score_ELA, 75)
rdd_avg_scale_score_ela_frpm_75$tau
## [1] -0.052738
rdd_avg_scale_score_ela_frpm_75$se
## [1] 26.98478
rdd_avg_scale_score_ela_frpm_75$ci
## lower upper
## -52.94193 52.83646
rdd_plot_avg_scale_score_ela_frpm_75 <- gp_rdd_plot(rdd_avg_scale_score_ela_frpm_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "Scale Score ELA ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_scale_score_ela_frpm_75)
rdd_avg_scale_score_ela_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_scale_score_ELA, 55)
rdd_avg_scale_score_ela_undup_55$tau
## [1] -15.09553
rdd_avg_scale_score_ela_undup_55$se
## [1] 33.50612
rdd_avg_scale_score_ela_undup_55$ci
## lower upper
## -80.76632 50.57525
rdd_plot_avg_scale_score_ela_undup_55 <- gp_rdd_plot(rdd_avg_scale_score_ela_undup_55) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "Scale Score ELA ~ Undup (55% Cutoff)")
print(rdd_plot_avg_scale_score_ela_undup_55)
rdd_avg_scale_score_ela_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_scale_score_ELA, 75)
rdd_avg_scale_score_ela_undup_75$tau
## [1] -30.14377
rdd_avg_scale_score_ela_undup_75$se
## [1] 29.37465
rdd_avg_scale_score_ela_undup_75$ci
## lower upper
## -87.71703 27.42948
rdd_plot_avg_scale_score_ela_undup_75 <- gp_rdd_plot(rdd_avg_scale_score_ela_undup_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "Scale Score ELA ~ Undup (75% Cutoff)")
print(rdd_plot_avg_scale_score_ela_undup_75)
rdd_avg_scale_score_math_frpm_35 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_scale_score_Math, 35)
rdd_avg_scale_score_math_frpm_35$tau
## [1] -35.14925
rdd_avg_scale_score_math_frpm_35$se
## [1] 22.46183
rdd_avg_scale_score_math_frpm_35$ci
## lower upper
## -79.173619 8.875122
rdd_plot_avg_scale_score_math_frpm_35 <- gp_rdd_plot(rdd_avg_scale_score_math_frpm_35) +
geom_vline(xintercept = 35, linetype = "dashed") +
coord_cartesian(xlim = c(20, 60)) +
labs(title = "Scale Score Math ~ FRPM (35% Cutoff)")
print(rdd_plot_avg_scale_score_math_frpm_35)
rdd_avg_scale_score_math_frpm_75 <- gp_rdd(df_clean$frpm_percent, df_clean$avg_scale_score_Math, 75)
rdd_avg_scale_score_math_frpm_75$tau
## [1] -3.229631
rdd_avg_scale_score_math_frpm_75$se
## [1] 19.66241
rdd_avg_scale_score_math_frpm_75$ci
## lower upper
## -41.76724 35.30798
rdd_plot_avg_scale_score_math_frpm_75 <- gp_rdd_plot(rdd_avg_scale_score_math_frpm_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "Scale Score Math ~ FRPM (75% Cutoff)")
print(rdd_plot_avg_scale_score_math_frpm_75)
rdd_avg_scale_score_math_undup_55 <- gp_rdd(df_clean$undup_pct, df_clean$avg_scale_score_Math, 55)
rdd_avg_scale_score_math_undup_55$tau
## [1] -9.742052
rdd_avg_scale_score_math_undup_55$se
## [1] 25.18616
rdd_avg_scale_score_math_undup_55$ci
## lower upper
## -59.10602 39.62191
rdd_plot_avg_scale_score_math_undup_55 <- gp_rdd_plot(rdd_avg_scale_score_math_undup_55) +
geom_vline(xintercept = 55, linetype = "dashed") +
coord_cartesian(xlim = c(30, 80)) +
labs(title = "Scale Score Math ~ Undup (55% Cutoff)")
print(rdd_plot_avg_scale_score_math_undup_55)
rdd_avg_scale_score_math_undup_75 <- gp_rdd(df_clean$undup_pct, df_clean$avg_scale_score_Math, 75)
rdd_avg_scale_score_math_undup_75$tau
## [1] -26.1681
rdd_avg_scale_score_math_undup_75$se
## [1] 21.30703
rdd_avg_scale_score_math_undup_75$ci
## lower upper
## -67.9291 15.5929
rdd_plot_avg_scale_score_math_undup_75 <- gp_rdd_plot(rdd_avg_scale_score_math_undup_75) +
geom_vline(xintercept = 75, linetype = "dashed") +
coord_cartesian(xlim = c(50, 95)) +
labs(title = "Scale Score Math ~ Undup (75% Cutoff)")
print(rdd_plot_avg_scale_score_math_undup_75)
# Outcome Summary Table
# Utility function to extract results from a gp_rdd object
extract_rdd_result <- function(obj, label) {
tibble(
model = label,
tau = obj$tau,
se = obj$se,
ci_lower = obj$ci[1],
ci_upper = obj$ci[2],
p_value = 2 * pnorm(-abs(obj$tau / obj$se)),
significant = ifelse(p_value < 0.05, TRUE, FALSE)
)
}
# Combine all results into one table
gp_rdd_summary_table <- bind_rows(
extract_rdd_result(rdd_avg_pct_met_above_ela_frpm_35, "% MetAbove ELA ~ FRPM (35%)"),
extract_rdd_result(rdd_avg_pct_met_above_ela_frpm_75, "% MetAbove ELA ~ FRPM (75%)"),
extract_rdd_result(rdd_avg_pct_met_above_ela_undup_55, "% MetAbove ELA ~ Undup (55%)"),
extract_rdd_result(rdd_avg_pct_met_above_ela_undup_75, "% MetAbove ELA ~ Undup (75%)"),
extract_rdd_result(rdd_avg_pct_met_above_math_frpm_35, "% MetAbove Math ~ FRPM (35%)"),
extract_rdd_result(rdd_avg_pct_met_above_math_frpm_75, "% MetAbove Math ~ FRPM (75%)"),
extract_rdd_result(rdd_avg_pct_met_above_math_undup_55, "% MetAbove Math ~ Undup (55%)"),
extract_rdd_result(rdd_avg_pct_met_above_math_undup_75, "% MetAbove Math ~ Undup (75%)"),
extract_rdd_result(rdd_res_absenteeism_frpm_35_cutoff, "Chronic Absenteeism ~ FRPM (35%)"),
extract_rdd_result(rdd_res_absenteeism_frpm_75_cutoff, "Chronic Absenteeism ~ FRPM (75%)"),
extract_rdd_result(rdd_res_absenteeism_BTB_35_cutoff, "BTB ~ FRPM (35%)"),
extract_rdd_result(rdd_res_absenteeism_BTB_75_cutoff, "BTB ~ FRPM (75%)"),
extract_rdd_result(rdd_res_absenteeism_undup_55_cutoff, "Chronic Absenteeism ~ Undup (55%)"),
extract_rdd_result(rdd_res_absenteeism_undup_75_cutoff, "Chronic Absenteeism ~ Undup (75%)"),
extract_rdd_result(rdd_avg_pct_not_met_math_frpm_35, "% Not Met Math ~ FRPM (35%)"),
extract_rdd_result(rdd_avg_pct_not_met_math_frpm_75, "% Not Met Math ~ FRPM (75%)"),
extract_rdd_result(rdd_avg_pct_not_met_math_undup_55, "% Not Met Math ~ Undup (55%)"),
extract_rdd_result(rdd_avg_pct_not_met_math_undup_75, "% Not Met Math ~ Undup (75%)"),
extract_rdd_result(rdd_avg_scale_score_ela_frpm_35, "Scale ELA ~ FRPM (35%)"),
extract_rdd_result(rdd_avg_scale_score_ela_frpm_75, "Scale ELA ~ FRPM (75%)"),
extract_rdd_result(rdd_avg_scale_score_ela_undup_55, "Scale ELA ~ Undup (55%)"),
extract_rdd_result(rdd_avg_scale_score_ela_undup_75, "Scale ELA ~ Undup (75%)"),
extract_rdd_result(rdd_avg_scale_score_math_frpm_35, "Scale Math ~ FRPM (35%)"),
extract_rdd_result(rdd_avg_scale_score_math_frpm_75, "Scale Math ~ FRPM (75%)"),
extract_rdd_result(rdd_avg_scale_score_math_undup_55, "Scale Math ~ Undup (55%)"),
extract_rdd_result(rdd_avg_scale_score_math_undup_75, "Scale Math ~ Undup (75%)")
)
print(gp_rdd_summary_table, n = Inf)
## # A tibble: 26 × 7
## model tau se ci_lower ci_upper p_value significant
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 % MetAbove ELA ~ FRPM … -14.9 6.74 -28.1 -1.69 0.0271 TRUE
## 2 % MetAbove ELA ~ FRPM … 1.10 6.08 -10.8 13.0 0.856 FALSE
## 3 % MetAbove ELA ~ Undup… -6.08 7.77 -21.3 9.15 0.434 FALSE
## 4 % MetAbove ELA ~ Undup… -5.28 6.62 -18.3 7.70 0.425 FALSE
## 5 % MetAbove Math ~ FRPM… -18.5 7.43 -33.1 -3.96 0.0126 TRUE
## 6 % MetAbove Math ~ FRPM… -0.325 5.97 -12.0 11.4 0.957 FALSE
## 7 % MetAbove Math ~ Undu… -4.56 7.71 -19.7 10.6 0.555 FALSE
## 8 % MetAbove Math ~ Undu… -1.11 6.38 -13.6 11.4 0.862 FALSE
## 9 Chronic Absenteeism ~ … 7.87 5.74 -3.38 19.1 0.171 FALSE
## 10 Chronic Absenteeism ~ … -0.0740 6.06 -12.0 11.8 0.990 FALSE
## 11 BTB ~ FRPM (35%) -0.231 0.224 -0.669 0.208 0.303 FALSE
## 12 BTB ~ FRPM (75%) -0.159 0.208 -0.567 0.249 0.445 FALSE
## 13 Chronic Absenteeism ~ … 3.06 7.58 -11.8 17.9 0.687 FALSE
## 14 Chronic Absenteeism ~ … -1.67 6.68 -14.8 11.4 0.803 FALSE
## 15 % Not Met Math ~ FRPM … 14.3 6.88 0.801 27.8 0.0379 TRUE
## 16 % Not Met Math ~ FRPM … -3.59 7.16 -17.6 10.4 0.616 FALSE
## 17 % Not Met Math ~ Undup… 4.96 8.97 -12.6 22.5 0.580 FALSE
## 18 % Not Met Math ~ Undup… 1.60 7.80 -13.7 16.9 0.837 FALSE
## 19 Scale ELA ~ FRPM (35%) -27.9 26.8 -80.4 24.6 0.298 FALSE
## 20 Scale ELA ~ FRPM (75%) -0.0527 27.0 -52.9 52.8 0.998 FALSE
## 21 Scale ELA ~ Undup (55%) -15.1 33.5 -80.8 50.6 0.652 FALSE
## 22 Scale ELA ~ Undup (75%) -30.1 29.4 -87.7 27.4 0.305 FALSE
## 23 Scale Math ~ FRPM (35%) -35.1 22.5 -79.2 8.88 0.118 FALSE
## 24 Scale Math ~ FRPM (75%) -3.23 19.7 -41.8 35.3 0.870 FALSE
## 25 Scale Math ~ Undup (55… -9.74 25.2 -59.1 39.6 0.699 FALSE
## 26 Scale Math ~ Undup (75… -26.2 21.3 -67.9 15.6 0.219 FALSE
# ──────────────────────────────────────────────────────────────
# Balance Tests with GP-RDD for Two Running Variables (FRPM and UPP)
# ──────────────────────────────────────────────────────────────
library(dplyr)
library(tidyr)
library(gpss)
# -----------------------------
# Continuous Covariates Balance via GP-RDD
# -----------------------------
gp_rdd_balance_test_cont <- function(df, var, running_var, cutoff) {
df_sub <- df %>% filter(!is.na(.data[[var]]), !is.na(.data[[running_var]]))
rdd_result <- gp_rdd(df_sub[[running_var]], df_sub[[var]], cutoff)
tibble(
variable = var,
running_var = running_var,
cutoff = cutoff,
tau = rdd_result$tau,
se = rdd_result$se,
p_value = 2 * pnorm(-abs(rdd_result$tau / rdd_result$se)),
ci_lower = rdd_result$ci[1],
ci_upper = rdd_result$ci[2]
)
}
# -----------------------------
# Binary Categorical Covariates Balance via GP-RDD
# -----------------------------
gp_rdd_balance_test_binary <- function(df, var, running_var, cutoff) {
df_sub <- df %>% filter(!is.na(.data[[var]]), !is.na(.data[[running_var]]))
df_sub <- df_sub %>% mutate(dummy = as.numeric(trimws(.data[[var]]) == "Yes" | .data[[var]] == 1))
rdd_result <- gp_rdd(df_sub[[running_var]], df_sub$dummy, cutoff)
tibble(
variable = var,
running_var = running_var,
cutoff = cutoff,
tau = rdd_result$tau,
se = rdd_result$se,
p_value = 2 * pnorm(-abs(rdd_result$tau / rdd_result$se)),
ci_lower = rdd_result$ci[1],
ci_upper = rdd_result$ci[2]
)
}
# -----------------------------
# Run All Balance Tests
# -----------------------------
continuous_covariates <- c("pct_hispanic", "pct_black", "pct_white", "pct_asian",
"pct_two_or_more", "pct_other", "total_enroll")
categorical_covariates <- c("Charter.School", "DASS")
cutoffs <- list(
list(running = "frpm_rate", value = 0.35),
list(running = "frpm_rate", value = 0.75),
list(running = "undup_pct", value = 55),
list(running = "undup_pct", value = 75)
)
balance_all <- purrr::map_dfr(cutoffs, function(cut) {
cont_results <- purrr::map_dfr(continuous_covariates, ~gp_rdd_balance_test_cont(df_clean, .x, cut$running, cut$value))
cat_results <- purrr::map_dfr(categorical_covariates, ~gp_rdd_balance_test_binary(df_clean, .x, cut$running, cut$value))
bind_rows(cont_results, cat_results)
})
# -----------------------------
# Print results
# -----------------------------
print(balance_all)
## # A tibble: 36 × 8
## variable running_var cutoff tau se p_value ci_lower ci_upper
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 pct_hispanic frpm_rate 0.35 1.64e+1 6.82e+0 0.0162 3.04 29.8
## 2 pct_black frpm_rate 0.35 3.49e+0 5.10e+0 0.493 -6.50 13.5
## 3 pct_white frpm_rate 0.35 -1.17e+1 6.05e+0 0.0534 -23.5 0.171
## 4 pct_asian frpm_rate 0.35 -7.62e+0 4.42e+0 0.0848 -16.3 1.04
## 5 pct_two_or_more frpm_rate 0.35 6.36e-1 1.77e+0 0.719 -2.83 4.10
## 6 pct_other frpm_rate 0.35 -5.49e-1 4.83e+0 0.910 -10.0 8.92
## 7 total_enroll frpm_rate 0.35 -2.10e+1 3.05e+2 0.945 -618. 576.
## 8 Charter.School frpm_rate 0.35 1.53e-1 2.43e-1 0.531 -0.324 0.629
## 9 DASS frpm_rate 0.35 7.38e-3 8.48e-2 0.931 -0.159 0.174
## 10 pct_hispanic frpm_rate 0.75 6.27e+0 7.28e+0 0.389 -8.00 20.5
## # ℹ 26 more rows
library(dplyr)
library(gpss)
# Step 1: Prepare data with treatment variable and convert categorical vars to factor
df_frpm_35 <- df_clean %>%
mutate(
treatment = as.integer(frpm_percent >= 35),
Charter.School = factor(Charter.School),
DASS = factor(DASS)
) %>%
filter(complete.cases(across(all_of(c(
"chronic_absenteeism", "treatment", "frpm_percent",
"Charter.School", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"
)))))
# Step 2: Fit covariate-adjusted GP-RDD model
model_frpm_35 <- gpss(
chronic_absenteeism ~ treatment + frpm_percent + Charter.School + DASS +
pct_hispanic + pct_white + pct_two_or_more,
data = df_frpm_35
)
# Step 3: Create two counterfactual observations at the cutoff (treatment = 0 and 1)
new_data <- df_frpm_35 %>%
slice(1) %>%
slice(rep(1, 2)) %>%
mutate(
treatment = c(0, 1),
frpm_percent = 35,
Charter.School = factor("Yes", levels = levels(df_frpm_35$Charter.School)),
DASS = factor("Yes", levels = levels(df_frpm_35$DASS)),
pct_hispanic = mean(df_frpm_35$pct_hispanic, na.rm = TRUE),
pct_white = mean(df_frpm_35$pct_white, na.rm = TRUE),
pct_two_or_more = mean(df_frpm_35$pct_two_or_more, na.rm = TRUE)
)
# Step 4: Predict outcomes for both counterfactuals
preds <- predict(model_frpm_35, new_data)
## Warning in formula.character(object, env = baseenv()): Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
# Step 5: Estimate treatment effect and uncertainty
mu1 <- preds[2]
mu0 <- preds[1]
tau_hat <- mu1 - mu0
# Extract posterior covariance matrix for the predicted values
post_cov <- model_frpm_35$post_cov_orig
se_tau <- sqrt(post_cov[2,2] + post_cov[1,1] - 2 * post_cov[1,2])
# 95% CI
ci_lower <- tau_hat - 1.96 * se_tau
ci_upper <- tau_hat + 1.96 * se_tau
# p-value (two-sided test)
p_value <- 2 * pnorm(-abs(tau_hat / se_tau))
# Step 6: Output all results
print(tibble(
model = "chronic_absenteeism ~ frpm_percent adj @ 35%",
tau = tau_hat,
se = se_tau,
ci_lower = ci_lower,
ci_upper = ci_upper,
p_value = p_value,
significant = p_value < 0.05
))
## # A tibble: 1 × 7
## model tau se ci_lower ci_upper p_value significant
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 chronic_absenteeism ~ frpm_… 4.89 1.83 1.31 8.47 0.00745 TRUE
library(dplyr)
library(gpss) # >=0.2.0
library(tibble)
# ---- settings ------------------------------------------------------------
outcomes <- c(
"chronic_absenteeism",
"btb",
"avg_pct_met_above_ELA",
"avg_pct_met_above_Math",
"avg_pct_not_met_ELA",
"avg_pct_not_met_Math",
"avg_scale_score_ELA",
"avg_scale_score_Math"
)
runvar <- "frpm_percent"
cutoff <- 35
local_bw <- 5
cont_covs <- c("pct_hispanic", "pct_white", "pct_two_or_more")
all_results <- list()
for (outcome in outcomes) {
# prepare data for this outcome
df <- df_clean %>%
mutate(
Charter = as.numeric(trimws(Charter.School) == "Yes"),
DASS = as.numeric(trimws(DASS) == "Yes")
) %>%
filter(!is.na(.data[[outcome]]), !is.na(.data[[runvar]]))
# split
left <- df %>% filter(.data[[runvar]] < cutoff)
right <- df %>% filter(.data[[runvar]] >= cutoff)
if (nrow(left) < 1 || nrow(right) < 1) {
warning("Skipping ", outcome, ": insufficient data on one side of cutoff (left=", nrow(left), ", right=", nrow(right), ")")
next
}
# --- unadjusted ----------------------------------------------------------
fml_u <- reformulate(runvar, response = outcome)
mod_L_u <- tryCatch(gpss(fml_u, data = left, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
mod_R_u <- tryCatch(gpss(fml_u, data = right, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
if (is.null(mod_L_u) || is.null(mod_R_u)) next
profile_u <- tibble(!!runvar := cutoff)
pred_L_u <- gp_predict(mod_L_u, profile_u)
pred_R_u <- gp_predict(mod_R_u, profile_u)
if (is.null(pred_L_u$Ys_mean_orig) || is.null(pred_R_u$Ys_mean_orig) ||
is.null(pred_L_u$Ys_cov_orig) || is.null(pred_R_u$Ys_cov_orig)) {
warning("Skipping unadjusted for ", outcome, ": missing prediction components")
} else {
mu_L_u <- drop(pred_L_u$Ys_mean_orig)
mu_R_u <- drop(pred_R_u$Ys_mean_orig)
var_L_u <- drop(pred_L_u$Ys_cov_orig)
var_R_u <- drop(pred_R_u$Ys_cov_orig)
tau_u <- as.numeric(mu_R_u - mu_L_u)
se_u <- sqrt(var_L_u + var_R_u)
ci_u <- tau_u + c(-1.96, 1.96) * se_u
pval_u <- 2 * pnorm(-abs(tau_u / se_u))
result_unadj <- tibble(
outcome = outcome,
running_var = runvar,
cutoff = cutoff,
model = "Unadjusted",
tau = tau_u,
se = se_u,
ci_lower = ci_u[1],
ci_upper = ci_u[2],
p_value = pval_u,
significant = pval_u < 0.05
)
all_results <- append(all_results, list(result_unadj))
}
# --- covariate-adjusted --------------------------------------------------
df_adj <- df %>% filter(complete.cases(across(all_of(c(cont_covs, "Charter", "DASS")))))
left_a <- df_adj %>% filter(.data[[runvar]] < cutoff)
right_a <- df_adj %>% filter(.data[[runvar]] >= cutoff)
if (nrow(left_a) < 1 || nrow(right_a) < 1) {
warning("Skipping covariate-adjusted for ", outcome, ": insufficient adjusted data (left=", nrow(left_a), ", right=", nrow(right_a), ")")
next
}
predictors <- c(runvar, cont_covs, "Charter", "DASS")
fml_a <- reformulate(predictors, response = outcome)
mod_L_a <- tryCatch(gpss(fml_a, data = left_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
mod_R_a <- tryCatch(gpss(fml_a, data = right_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
if (is.null(mod_L_a) || is.null(mod_R_a)) next
profile_a <- df_adj %>%
filter(between(.data[[runvar]], cutoff - local_bw, cutoff + local_bw)) %>%
summarise(
across(all_of(cont_covs), ~ mean(.x, na.rm = TRUE)),
Charter = mean(Charter, na.rm = TRUE),
DASS = mean(DASS, na.rm = TRUE)
)
profile_a[[runvar]] <- cutoff
profile_a <- profile_a %>% select(all_of(predictors))
pred_L_a <- gp_predict(mod_L_a, profile_a)
pred_R_a <- gp_predict(mod_R_a, profile_a)
if (is.null(pred_L_a$Ys_mean_orig) || is.null(pred_R_a$Ys_mean_orig) ||
is.null(pred_L_a$Ys_cov_orig) || is.null(pred_R_a$Ys_cov_orig)) {
warning("Skipping covariate-adjusted for ", outcome, ": missing prediction components")
next
}
mu_L_a <- drop(pred_L_a$Ys_mean_orig)
mu_R_a <- drop(pred_R_a$Ys_mean_orig)
var_L_a <- drop(pred_L_a$Ys_cov_orig)
var_R_a <- drop(pred_R_a$Ys_cov_orig)
tau_a <- as.numeric(mu_R_a - mu_L_a)
se_a <- sqrt(var_L_a + var_R_a)
ci_a <- tau_a + c(-1.96, 1.96) * se_a
pval_a <- 2 * pnorm(-abs(tau_a / se_a))
result_adj <- tibble(
outcome = outcome,
running_var = runvar,
cutoff = cutoff,
model = "Covariate-adjusted",
tau = tau_a,
se = se_a,
ci_lower = ci_a[1],
ci_upper = ci_a[2],
p_value = pval_a,
significant = pval_a < 0.05
)
all_results <- append(all_results, list(result_adj))
}
# combine everything
comparison <- bind_rows(all_results)
print(comparison)
## # A tibble: 16 × 10
## outcome running_var cutoff model tau se ci_lower ci_upper p_value
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 chronic_a… frpm_perce… 35 Unad… 7.87 17.0 -25.5 41.3 0.644
## 2 chronic_a… frpm_perce… 35 Cova… 4.85 13.9 -22.5 32.2 0.728
## 3 btb frpm_perce… 35 Unad… -0.231 0.650 -1.51 1.04 0.723
## 4 btb frpm_perce… 35 Cova… 0.0179 0.527 -1.01 1.05 0.973
## 5 avg_pct_m… frpm_perce… 35 Unad… -14.9 19.0 -52.1 22.3 0.432
## 6 avg_pct_m… frpm_perce… 35 Cova… -5.15 17.4 -39.2 28.9 0.767
## 7 avg_pct_m… frpm_perce… 35 Unad… -18.5 20.4 -58.5 21.5 0.364
## 8 avg_pct_m… frpm_perce… 35 Cova… -9.30 18.3 -45.1 26.5 0.610
## 9 avg_pct_n… frpm_perce… 35 Unad… 11.0 16.4 -21.2 43.2 0.502
## 10 avg_pct_n… frpm_perce… 35 Cova… 4.28 15.3 -25.7 34.3 0.780
## 11 avg_pct_n… frpm_perce… 35 Unad… 14.3 20.1 -25.2 53.8 0.478
## 12 avg_pct_n… frpm_perce… 35 Cova… 4.48 18.1 -31.0 40.0 0.805
## 13 avg_scale… frpm_perce… 35 Unad… -27.9 79.4 -184. 128. 0.725
## 14 avg_scale… frpm_perce… 35 Cova… -9.07 82.7 -171. 153. 0.913
## 15 avg_scale… frpm_perce… 35 Unad… -35.1 64.0 -161. 90.3 0.583
## 16 avg_scale… frpm_perce… 35 Cova… -17.4 64.5 -144. 109. 0.788
## # ℹ 1 more variable: significant <lgl>
library(dplyr)
library(gpss) # >=0.2.0
library(tibble)
# ---- settings ------------------------------------------------------------
outcomes <- c(
"chronic_absenteeism",
"btb",
"avg_pct_met_above_ELA",
"avg_pct_met_above_Math",
"avg_pct_not_met_ELA",
"avg_pct_not_met_Math",
"avg_scale_score_ELA",
"avg_scale_score_Math"
)
runvar <- "frpm_percent"
cutoff <- 75
local_bw <- 5
cont_covs <- c("pct_hispanic", "pct_white", "pct_two_or_more")
all_results <- list()
for (outcome in outcomes) {
# prepare data for this outcome
df <- df_clean %>%
mutate(
Charter = as.numeric(trimws(Charter.School) == "Yes"),
DASS = as.numeric(trimws(DASS) == "Yes")
) %>%
filter(!is.na(.data[[outcome]]), !is.na(.data[[runvar]]))
# split
left <- df %>% filter(.data[[runvar]] < cutoff)
right <- df %>% filter(.data[[runvar]] >= cutoff)
if (nrow(left) < 1 || nrow(right) < 1) {
warning("Skipping ", outcome, ": insufficient data on one side of cutoff (left=", nrow(left), ", right=", nrow(right), ")")
next
}
# --- unadjusted ----------------------------------------------------------
fml_u <- reformulate(runvar, response = outcome)
mod_L_u <- tryCatch(gpss(fml_u, data = left, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
mod_R_u <- tryCatch(gpss(fml_u, data = right, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
if (is.null(mod_L_u) || is.null(mod_R_u)) next
profile_u <- tibble(!!runvar := cutoff)
pred_L_u <- gp_predict(mod_L_u, profile_u)
pred_R_u <- gp_predict(mod_R_u, profile_u)
if (is.null(pred_L_u$Ys_mean_orig) || is.null(pred_R_u$Ys_mean_orig) ||
is.null(pred_L_u$Ys_cov_orig) || is.null(pred_R_u$Ys_cov_orig)) {
warning("Skipping unadjusted for ", outcome, ": missing prediction components")
} else {
mu_L_u <- drop(pred_L_u$Ys_mean_orig)
mu_R_u <- drop(pred_R_u$Ys_mean_orig)
var_L_u <- drop(pred_L_u$Ys_cov_orig)
var_R_u <- drop(pred_R_u$Ys_cov_orig)
tau_u <- as.numeric(mu_R_u - mu_L_u)
se_u <- sqrt(var_L_u + var_R_u)
ci_u <- tau_u + c(-1.96, 1.96) * se_u
pval_u <- 2 * pnorm(-abs(tau_u / se_u))
result_unadj <- tibble(
outcome = outcome,
running_var = runvar,
cutoff = cutoff,
model = "Unadjusted",
tau = tau_u,
se = se_u,
ci_lower = ci_u[1],
ci_upper = ci_u[2],
p_value = pval_u,
significant = pval_u < 0.05
)
all_results <- append(all_results, list(result_unadj))
}
# --- covariate-adjusted --------------------------------------------------
df_adj <- df %>% filter(complete.cases(across(all_of(c(cont_covs, "Charter", "DASS")))))
left_a <- df_adj %>% filter(.data[[runvar]] < cutoff)
right_a <- df_adj %>% filter(.data[[runvar]] >= cutoff)
if (nrow(left_a) < 1 || nrow(right_a) < 1) {
warning("Skipping covariate-adjusted for ", outcome, ": insufficient adjusted data (left=", nrow(left_a), ", right=", nrow(right_a), ")")
next
}
predictors <- c(runvar, cont_covs, "Charter", "DASS")
fml_a <- reformulate(predictors, response = outcome)
mod_L_a <- tryCatch(gpss(fml_a, data = left_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
mod_R_a <- tryCatch(gpss(fml_a, data = right_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
if (is.null(mod_L_a) || is.null(mod_R_a)) next
profile_a <- df_adj %>%
filter(between(.data[[runvar]], cutoff - local_bw, cutoff + local_bw)) %>%
summarise(
across(all_of(cont_covs), ~ mean(.x, na.rm = TRUE)),
Charter = mean(Charter, na.rm = TRUE),
DASS = mean(DASS, na.rm = TRUE)
)
profile_a[[runvar]] <- cutoff
profile_a <- profile_a %>% select(all_of(predictors))
pred_L_a <- gp_predict(mod_L_a, profile_a)
pred_R_a <- gp_predict(mod_R_a, profile_a)
if (is.null(pred_L_a$Ys_mean_orig) || is.null(pred_R_a$Ys_mean_orig) ||
is.null(pred_L_a$Ys_cov_orig) || is.null(pred_R_a$Ys_cov_orig)) {
warning("Skipping covariate-adjusted for ", outcome, ": missing prediction components")
next
}
mu_L_a <- drop(pred_L_a$Ys_mean_orig)
mu_R_a <- drop(pred_R_a$Ys_mean_orig)
var_L_a <- drop(pred_L_a$Ys_cov_orig)
var_R_a <- drop(pred_R_a$Ys_cov_orig)
tau_a <- as.numeric(mu_R_a - mu_L_a)
se_a <- sqrt(var_L_a + var_R_a)
ci_a <- tau_a + c(-1.96, 1.96) * se_a
pval_a <- 2 * pnorm(-abs(tau_a / se_a))
result_adj <- tibble(
outcome = outcome,
running_var = runvar,
cutoff = cutoff,
model = "Covariate-adjusted",
tau = tau_a,
se = se_a,
ci_lower = ci_a[1],
ci_upper = ci_a[2],
p_value = pval_a,
significant = pval_a < 0.05
)
all_results <- append(all_results, list(result_adj))
}
# combine everything
comparison <- bind_rows(all_results)
print(comparison)
## # A tibble: 16 × 10
## outcome running_var cutoff model tau se ci_lower ci_upper p_value
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 chronic_ab… frpm_perce… 75 Unad… -0.0740 19.0 -37.3 37.1 0.997
## 2 chronic_ab… frpm_perce… 75 Cova… -1.83 15.3 -31.8 28.1 0.905
## 3 btb frpm_perce… 75 Unad… -0.159 0.693 -1.52 1.20 0.818
## 4 btb frpm_perce… 75 Cova… -0.0461 0.554 -1.13 1.04 0.934
## 5 avg_pct_me… frpm_perce… 75 Unad… 1.10 19.4 -37.0 39.2 0.955
## 6 avg_pct_me… frpm_perce… 75 Cova… 3.17 19.8 -35.6 42.0 0.873
## 7 avg_pct_me… frpm_perce… 75 Unad… -0.325 20.3 -40.1 39.5 0.987
## 8 avg_pct_me… frpm_perce… 75 Cova… 3.85 21.0 -37.3 45.0 0.855
## 9 avg_pct_no… frpm_perce… 75 Unad… -2.40 18.2 -38.1 33.3 0.895
## 10 avg_pct_no… frpm_perce… 75 Cova… -5.08 17.6 -39.6 29.5 0.773
## 11 avg_pct_no… frpm_perce… 75 Unad… -3.59 22.7 -48.0 40.8 0.874
## 12 avg_pct_no… frpm_perce… 75 Cova… -8.99 21.5 -51.1 33.1 0.675
## 13 avg_scale_… frpm_perce… 75 Unad… -0.0527 88.5 -174. 173. 1.00
## 14 avg_scale_… frpm_perce… 75 Cova… -5.86 93.8 -190. 178. 0.950
## 15 avg_scale_… frpm_perce… 75 Unad… -3.23 66.2 -133. 127. 0.961
## 16 avg_scale_… frpm_perce… 75 Cova… -2.06 69.6 -138. 134. 0.976
## # ℹ 1 more variable: significant <lgl>
library(dplyr)
library(gpss) # >=0.2.0
library(tibble)
# ---- settings ------------------------------------------------------------
outcomes <- c(
"chronic_absenteeism",
"btb",
"avg_pct_met_above_ELA",
"avg_pct_met_above_Math",
"avg_pct_not_met_ELA",
"avg_pct_not_met_Math",
"avg_scale_score_ELA",
"avg_scale_score_Math"
)
runvar <- "undup_pct"
cutoff <- 40
local_bw <- 5
cont_covs <- c("pct_hispanic", "pct_white", "pct_two_or_more")
all_results <- list()
for (outcome in outcomes) {
# prepare data for this outcome
df <- df_clean %>%
mutate(
Charter = as.numeric(trimws(Charter.School) == "Yes"),
DASS = as.numeric(trimws(DASS) == "Yes")
) %>%
filter(!is.na(.data[[outcome]]), !is.na(.data[[runvar]]))
# split
left <- df %>% filter(.data[[runvar]] < cutoff)
right <- df %>% filter(.data[[runvar]] >= cutoff)
if (nrow(left) < 1 || nrow(right) < 1) {
warning("Skipping ", outcome, ": insufficient data on one side of cutoff (left=", nrow(left), ", right=", nrow(right), ")")
next
}
# --- unadjusted ----------------------------------------------------------
fml_u <- reformulate(runvar, response = outcome)
mod_L_u <- tryCatch(gpss(fml_u, data = left, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
mod_R_u <- tryCatch(gpss(fml_u, data = right, optimize = TRUE), error = function(e) { warning("gpss error (unadj) for ", outcome, ": ", e$message); return(NULL) })
if (is.null(mod_L_u) || is.null(mod_R_u)) next
profile_u <- tibble(!!runvar := cutoff)
pred_L_u <- gp_predict(mod_L_u, profile_u)
pred_R_u <- gp_predict(mod_R_u, profile_u)
if (is.null(pred_L_u$Ys_mean_orig) || is.null(pred_R_u$Ys_mean_orig) ||
is.null(pred_L_u$Ys_cov_orig) || is.null(pred_R_u$Ys_cov_orig)) {
warning("Skipping unadjusted for ", outcome, ": missing prediction components")
} else {
mu_L_u <- drop(pred_L_u$Ys_mean_orig)
mu_R_u <- drop(pred_R_u$Ys_mean_orig)
var_L_u <- drop(pred_L_u$Ys_cov_orig)
var_R_u <- drop(pred_R_u$Ys_cov_orig)
tau_u <- as.numeric(mu_R_u - mu_L_u)
se_u <- sqrt(var_L_u + var_R_u)
ci_u <- tau_u + c(-1.96, 1.96) * se_u
pval_u <- 2 * pnorm(-abs(tau_u / se_u))
result_unadj <- tibble(
outcome = outcome,
running_var = runvar,
cutoff = cutoff,
model = "Unadjusted",
tau = tau_u,
se = se_u,
ci_lower = ci_u[1],
ci_upper = ci_u[2],
p_value = pval_u,
significant = pval_u < 0.05
)
all_results <- append(all_results, list(result_unadj))
}
# --- covariate-adjusted --------------------------------------------------
df_adj <- df %>% filter(complete.cases(across(all_of(c(cont_covs, "Charter", "DASS")))))
left_a <- df_adj %>% filter(.data[[runvar]] < cutoff)
right_a <- df_adj %>% filter(.data[[runvar]] >= cutoff)
if (nrow(left_a) < 1 || nrow(right_a) < 1) {
warning("Skipping covariate-adjusted for ", outcome, ": insufficient adjusted data (left=", nrow(left_a), ", right=", nrow(right_a), ")")
next
}
predictors <- c(runvar, cont_covs, "Charter", "DASS")
fml_a <- reformulate(predictors, response = outcome)
mod_L_a <- tryCatch(gpss(fml_a, data = left_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
mod_R_a <- tryCatch(gpss(fml_a, data = right_a, optimize = TRUE), error = function(e) { warning("gpss error (adj) for ", outcome, ": ", e$message); return(NULL) })
if (is.null(mod_L_a) || is.null(mod_R_a)) next
profile_a <- df_adj %>%
filter(between(.data[[runvar]], cutoff - local_bw, cutoff + local_bw)) %>%
summarise(
across(all_of(cont_covs), ~ mean(.x, na.rm = TRUE)),
Charter = mean(Charter, na.rm = TRUE),
DASS = mean(DASS, na.rm = TRUE)
)
profile_a[[runvar]] <- cutoff
profile_a <- profile_a %>% select(all_of(predictors))
pred_L_a <- gp_predict(mod_L_a, profile_a)
pred_R_a <- gp_predict(mod_R_a, profile_a)
if (is.null(pred_L_a$Ys_mean_orig) || is.null(pred_R_a$Ys_mean_orig) ||
is.null(pred_L_a$Ys_cov_orig) || is.null(pred_R_a$Ys_cov_orig)) {
warning("Skipping covariate-adjusted for ", outcome, ": missing prediction components")
next
}
mu_L_a <- drop(pred_L_a$Ys_mean_orig)
mu_R_a <- drop(pred_R_a$Ys_mean_orig)
var_L_a <- drop(pred_L_a$Ys_cov_orig)
var_R_a <- drop(pred_R_a$Ys_cov_orig)
tau_a <- as.numeric(mu_R_a - mu_L_a)
se_a <- sqrt(var_L_a + var_R_a)
ci_a <- tau_a + c(-1.96, 1.96) * se_a
pval_a <- 2 * pnorm(-abs(tau_a / se_a))
result_adj <- tibble(
outcome = outcome,
running_var = runvar,
cutoff = cutoff,
model = "Covariate-adjusted",
tau = tau_a,
se = se_a,
ci_lower = ci_a[1],
ci_upper = ci_a[2],
p_value = pval_a,
significant = pval_a < 0.05
)
all_results <- append(all_results, list(result_adj))
}
# combine everything
comparison <- bind_rows(all_results)
print(comparison)
## # A tibble: 16 × 10
## outcome running_var cutoff model tau se ci_lower ci_upper p_value
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 chronic_a… undup_pct 40 Unad… 1.43 18.1 -34.0 36.9 0.937
## 2 chronic_a… undup_pct 40 Cova… -1.89 15.3 -31.9 28.1 0.902
## 3 btb undup_pct 40 Unad… 0.276 0.679 -1.06 1.61 0.685
## 4 btb undup_pct 40 Cova… -0.0789 0.548 -1.15 0.995 0.885
## 5 avg_pct_m… undup_pct 40 Unad… -7.41 19.7 -45.9 31.1 0.706
## 6 avg_pct_m… undup_pct 40 Cova… -2.07 18.4 -38.1 34.0 0.911
## 7 avg_pct_m… undup_pct 40 Unad… -6.34 20.9 -47.3 34.6 0.761
## 8 avg_pct_m… undup_pct 40 Cova… -1.07 19.4 -39.2 37.0 0.956
## 9 avg_pct_n… undup_pct 40 Unad… 7.64 17.3 -26.3 41.5 0.659
## 10 avg_pct_n… undup_pct 40 Cova… 4.07 16.2 -27.8 35.9 0.802
## 11 avg_pct_n… undup_pct 40 Unad… 3.88 21.0 -37.2 45.0 0.853
## 12 avg_pct_n… undup_pct 40 Cova… 0.0787 19.4 -37.9 38.0 0.997
## 13 avg_scale… undup_pct 40 Unad… -37.0 83.5 -201. 127. 0.657
## 14 avg_scale… undup_pct 40 Cova… -30.0 84.7 -196. 136. 0.724
## 15 avg_scale… undup_pct 40 Unad… -30.2 66.1 -160. 99.3 0.647
## 16 avg_scale… undup_pct 40 Cova… -27.6 64.9 -155. 99.6 0.671
## # ℹ 1 more variable: significant <lgl>
library(dplyr)
library(tibble)
# ---- extended gp_rdd that accepts covariates --------------------------------
gp_rdd_cov <- function(X, Y, cut, covs = NULL,
alpha = 0.05,
b = NULL,
trim = FALSE,
trim_k_value = 0.1,
scale = TRUE,
local_bw = 5) {
cutpoint_scalar <- cut
# handle missingness
if (!is.null(covs)) {
complete_idx <- complete.cases(X, Y, covs)
covs <- covs[complete_idx, , drop = FALSE]
} else {
complete_idx <- complete.cases(X, Y)
}
X <- as.numeric(X[complete_idx])
Y <- as.numeric(Y[complete_idx])
if (!is.null(covs)) covs <- covs[complete_idx, , drop = FALSE]
# split left/right
left_idx <- which(X < cutpoint_scalar)
right_idx <- which(X >= cutpoint_scalar)
X_left <- X[left_idx]
X_right <- X[right_idx]
Y_left <- Y[left_idx]
Y_right <- Y[right_idx]
covs_left <- if (!is.null(covs)) covs[left_idx, , drop = FALSE] else NULL
covs_right <- if (!is.null(covs)) covs[right_idx, , drop = FALSE] else NULL
# trimming logic
if (isTRUE(trim) & isTRUE(scale)) {
stop("If `trim==TRUE`, scale must be FALSE")
}
if (isTRUE(trim)) {
b_left <- getb_maxvar(X_left)
b_right <- getb_maxvar(X_right)
trim_at_left <- cutpoint_scalar - sqrt(-1 * b_left * log(trim_k_value))
trim_at_right <- cutpoint_scalar + sqrt(-1 * b_right * log(trim_k_value))
keep_left <- X_left > trim_at_left
keep_right <- X_right < trim_at_right
X_left <- X_left[keep_left]
Y_left <- Y_left[keep_left]
if (!is.null(covs_left)) covs_left <- covs_left[keep_left, , drop = FALSE]
X_right <- X_right[keep_right]
Y_right <- Y_right[keep_right]
if (!is.null(covs_right)) covs_right <- covs_right[keep_right, , drop = FALSE]
} else {
b_left <- b
b_right <- b
}
# construct training design matrices
build_X_mat <- function(x_vec, cov_df) {
if (is.null(cov_df)) {
matrix(x_vec, ncol = 1, dimnames = list(NULL, "X"))
} else {
as.matrix(cbind(X = x_vec, cov_df))
}
}
Xmat_left <- build_X_mat(X_left, covs_left)
Xmat_right <- build_X_mat(X_right, covs_right)
# fit GPs (optimize=TRUE to mirror original)
gp_train_l <- gp_train(X = Xmat_left, Y = Y_left, b = b_left, scale = scale, optimize = TRUE)
gp_train_r <- gp_train(X = Xmat_right, Y = Y_right, b = b_right, scale = scale, optimize = TRUE)
# build prediction row matching training design
if (!is.null(covs)) {
window_idx <- which(X >= (cutpoint_scalar - local_bw) & X <= (cutpoint_scalar + local_bw))
if (length(window_idx) == 0) {
warning("No observations in local_bw window for covariate profile; using global means instead.")
cov_profile <- colMeans(covs, na.rm = TRUE)
} else {
cov_profile <- colMeans(covs[window_idx, , drop = FALSE], na.rm = TRUE)
}
pred_df <- as.data.frame(t(c(cutpoint_scalar, cov_profile)))
colnames(pred_df) <- colnames(Xmat_left) # assumes same structure left/right
} else {
pred_df <- as.data.frame(t(c(cutpoint_scalar)))
colnames(pred_df) <- colnames(Xmat_left) # should be "X"
cov_profile <- NULL
}
# predictions
gp_pred_l <- gp_predict(gp_train_l, pred_df)
gp_pred_r <- gp_predict(gp_train_r, pred_df)
# treatment effect and uncertainty
pred_l <- gp_pred_l$Ys_mean_orig[1]
pred_r <- gp_pred_r$Ys_mean_orig[1]
tau <- pred_r - pred_l
se <- sqrt(diag(gp_pred_l$f_cov_orig)[1] + diag(gp_pred_r$f_cov_orig)[1])
ci <- c(lower = tau - qnorm(1 - alpha / 2) * se,
upper = tau + qnorm(1 - alpha / 2) * se)
results <- list(
tau = tau, se = se, ci = ci,
pred_l = pred_l, pred_r = pred_r,
b_left = gp_train_l$b, b_right = gp_train_r$b,
s2_left = gp_train_l$s2, s2_right = gp_train_r$s2,
n_left = nrow(Xmat_left), n_right = nrow(Xmat_right),
trim = trim,
covariate_profile = cov_profile,
X = X, Y = Y,
gp_train_l = gp_train_l,
gp_train_r = gp_train_r,
cut = cutpoint_scalar
)
if (isTRUE(trim)) {
results <- append(results, c(trim_at_left = trim_at_left, trim_at_right = trim_at_right))
}
return(results)
}
# ---- prepare covariates --------------------------------------------------
covariates <- df_clean %>%
mutate(
Charter = as.numeric(trimws(Charter.School) == "Yes"),
DASS = as.numeric(trimws(DASS) == "Yes")
) %>%
select(Charter, DASS, pct_hispanic, pct_white, pct_two_or_more)
library(dplyr)
library(tibble)
# ---- fixed gp_rdd_cov that handles complete.cases correctly ----------
gp_rdd_cov <- function(X, Y, cut, covs = NULL,
alpha = 0.05,
b = NULL,
trim = FALSE,
trim_k_value = 0.1,
scale = TRUE,
local_bw = 5) {
cutpoint_scalar <- cut
# proper missingness handling: align X, Y, and covs
if (!is.null(covs)) {
temp_all <- as.data.frame(cbind(X = X, Y = Y, covs))
complete_idx <- complete.cases(temp_all)
X <- as.numeric(X[complete_idx])
Y <- as.numeric(Y[complete_idx])
covs <- covs[complete_idx, , drop = FALSE]
} else {
complete_idx <- complete.cases(X, Y)
X <- as.numeric(X[complete_idx])
Y <- as.numeric(Y[complete_idx])
}
# split left/right
left_idx <- which(X < cutpoint_scalar)
right_idx <- which(X >= cutpoint_scalar)
X_left <- X[left_idx]
X_right <- X[right_idx]
Y_left <- Y[left_idx]
Y_right <- Y[right_idx]
covs_left <- if (!is.null(covs)) covs[left_idx, , drop = FALSE] else NULL
covs_right <- if (!is.null(covs)) covs[right_idx, , drop = FALSE] else NULL
# trimming logic
if (isTRUE(trim) & isTRUE(scale)) {
stop("If `trim==TRUE`, scale must be FALSE")
}
if (isTRUE(trim)) {
b_left <- getb_maxvar(X_left)
b_right <- getb_maxvar(X_right)
trim_at_left <- cutpoint_scalar - sqrt(-1 * b_left * log(trim_k_value))
trim_at_right <- cutpoint_scalar + sqrt(-1 * b_right * log(trim_k_value))
keep_left <- X_left > trim_at_left
keep_right <- X_right < trim_at_right
X_left <- X_left[keep_left]
Y_left <- Y_left[keep_left]
if (!is.null(covs_left)) covs_left <- covs_left[keep_left, , drop = FALSE]
X_right <- X_right[keep_right]
Y_right <- Y_right[keep_right]
if (!is.null(covs_right)) covs_right <- covs_right[keep_right, , drop = FALSE]
} else {
b_left <- b
b_right <- b
}
# construct design matrices
build_X_mat <- function(x_vec, cov_df) {
if (is.null(cov_df)) {
matrix(x_vec, ncol = 1, dimnames = list(NULL, "X"))
} else {
as.matrix(cbind(X = x_vec, cov_df))
}
}
Xmat_left <- build_X_mat(X_left, covs_left)
Xmat_right <- build_X_mat(X_right, covs_right)
# fit GPs
gp_train_l <- gp_train(X = Xmat_left, Y = Y_left, b = b_left, scale = scale, optimize = TRUE)
gp_train_r <- gp_train(X = Xmat_right, Y = Y_right, b = b_right, scale = scale, optimize = TRUE)
# build prediction row matching training design
if (!is.null(covs)) {
window_idx <- which(X >= (cutpoint_scalar - local_bw) & X <= (cutpoint_scalar + local_bw))
if (length(window_idx) == 0) {
warning("No observations in local_bw window for covariate profile; using global means instead.")
cov_profile <- colMeans(covs, na.rm = TRUE)
} else {
cov_profile <- colMeans(covs[window_idx, , drop = FALSE], na.rm = TRUE)
}
pred_df <- as.data.frame(t(c(cutpoint_scalar, cov_profile)))
colnames(pred_df) <- colnames(Xmat_left)
} else {
pred_df <- as.data.frame(t(c(cutpoint_scalar)))
colnames(pred_df) <- colnames(Xmat_left)
cov_profile <- NULL
}
# predictions
gp_pred_l <- gp_predict(gp_train_l, pred_df)
gp_pred_r <- gp_predict(gp_train_r, pred_df)
# effect and uncertainty
pred_l <- gp_pred_l$Ys_mean_orig[1]
pred_r <- gp_pred_r$Ys_mean_orig[1]
tau <- pred_r - pred_l
se <- sqrt(diag(gp_pred_l$f_cov_orig)[1] + diag(gp_pred_r$f_cov_orig)[1])
ci <- c(lower = tau - qnorm(1 - alpha / 2) * se,
upper = tau + qnorm(1 - alpha / 2) * se)
results <- list(
tau = tau, se = se, ci = ci,
pred_l = pred_l, pred_r = pred_r,
b_left = gp_train_l$b, b_right = gp_train_r$b,
s2_left = gp_train_l$s2, s2_right = gp_train_r$s2,
n_left = nrow(Xmat_left), n_right = nrow(Xmat_right),
trim = trim,
covariate_profile = cov_profile,
X = X, Y = Y,
gp_train_l = gp_train_l,
gp_train_r = gp_train_r,
cut = cutpoint_scalar
)
if (isTRUE(trim)) {
results <- append(results, c(trim_at_left = trim_at_left, trim_at_right = trim_at_right))
}
return(results)
}
# ---- templated runner --------------------------------------------------
run_rdd_comparisons <- function(df, running_variable, cutoff, outcomes, covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"), local_bw = 5) {
# prepare covariates once
covs_df <- df %>%
mutate(
Charter = as.numeric(trimws(Charter.School) == "Yes"),
DASS = as.numeric(trimws(DASS) == "Yes")
) %>%
select(all_of(covariate_names))
results <- vector("list", length(outcomes) * 3)
i <- 1L
for (outcome in outcomes) {
X <- df[[running_variable]]
Y <- df[[outcome]]
# 1. gp_rdd unadjusted
rdd_res <- gp_rdd(X, Y, cutoff, trim = FALSE, scale = TRUE)
tau_rdd <- rdd_res$tau
se_rdd <- rdd_res$se
ci_lo_rdd <- rdd_res$ci[1]
ci_hi_rdd <- rdd_res$ci[2]
pval_rdd <- 2 * pnorm(-abs(tau_rdd / se_rdd))
sig_rdd <- pval_rdd < 0.05
results[[i]] <- tibble(
outcome = outcome,
running_variable = running_variable,
cutoff = cutoff,
model = "gp_rdd",
tau = tau_rdd,
se = se_rdd,
ci_lower = ci_lo_rdd,
ci_upper = ci_hi_rdd,
p_value = pval_rdd,
significant = sig_rdd
)
i <- i + 1L
# 2. gp_rdd_cov unadjusted
rdd_cov_unadj <- gp_rdd_cov(X = X, Y = Y, cut = cutoff, covs = NULL, trim = FALSE, scale = TRUE, local_bw = local_bw)
tau_cov_unadj <- rdd_cov_unadj$tau
se_cov_unadj <- rdd_cov_unadj$se
ci_lo_cov_unadj <- rdd_cov_unadj$ci["lower"]
ci_hi_cov_unadj <- rdd_cov_unadj$ci["upper"]
pval_cov_unadj <- 2 * pnorm(-abs(tau_cov_unadj / se_cov_unadj))
sig_cov_unadj <- pval_cov_unadj < 0.05
results[[i]] <- tibble(
outcome = outcome,
running_variable = running_variable,
cutoff = cutoff,
model = "gp_rdd_cov (no covs)",
tau = tau_cov_unadj,
se = se_cov_unadj,
ci_lower = ci_lo_cov_unadj,
ci_upper = ci_hi_cov_unadj,
p_value = pval_cov_unadj,
significant = sig_cov_unadj
)
i <- i + 1L
# 3. gp_rdd_cov with covariate adjustment
rdd_cov_adj <- gp_rdd_cov(X = X, Y = Y, cut = cutoff, covs = covs_df, trim = FALSE, scale = TRUE, local_bw = local_bw)
tau_cov_adj <- rdd_cov_adj$tau
se_cov_adj <- rdd_cov_adj$se
ci_lo_cov_adj <- rdd_cov_adj$ci["lower"]
ci_hi_cov_adj <- rdd_cov_adj$ci["upper"]
pval_cov_adj <- 2 * pnorm(-abs(tau_cov_adj / se_cov_adj))
sig_cov_adj <- pval_cov_adj < 0.05
results[[i]] <- tibble(
outcome = outcome,
running_variable = running_variable,
cutoff = cutoff,
model = "gp_rdd_cov (covariate-adjusted)",
tau = tau_cov_adj,
se = se_cov_adj,
ci_lower = ci_lo_cov_adj,
ci_upper = ci_hi_cov_adj,
p_value = pval_cov_adj,
significant = sig_cov_adj
)
i <- i + 1L
}
bind_rows(results)
}
# ---- Usage FRPM 35% ------------------------------------------------------
outcomes <- c(
"chronic_absenteeism",
"btb",
"avg_pct_met_above_ELA",
"avg_pct_met_above_Math",
"avg_pct_not_met_ELA",
"avg_pct_not_met_Math",
"avg_scale_score_ELA",
"avg_scale_score_Math"
)
frpm_gp_rdd_35_comparison_table <- run_rdd_comparisons(
df = df_clean,
running_variable = "frpm_percent",
cutoff = 35,
outcomes = outcomes,
covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"),
local_bw = 5
)
print(frpm_gp_rdd_35_comparison_table)
## # A tibble: 24 × 10
## outcome running_variable cutoff model tau se ci_lower ci_upper
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 chronic_absen… frpm_percent 35 gp_r… 7.87 5.74 -3.38 19.1
## 2 chronic_absen… frpm_percent 35 gp_r… 7.87 5.74 -3.38 19.1
## 3 chronic_absen… frpm_percent 35 gp_r… 4.85 8.40 -11.6 21.3
## 4 btb frpm_percent 35 gp_r… -0.231 0.224 -0.669 0.208
## 5 btb frpm_percent 35 gp_r… -0.231 0.224 -0.669 0.208
## 6 btb frpm_percent 35 gp_r… 0.0179 0.299 -0.568 0.604
## 7 avg_pct_met_a… frpm_percent 35 gp_r… -14.9 6.74 -28.1 -1.69
## 8 avg_pct_met_a… frpm_percent 35 gp_r… -14.9 6.74 -28.1 -1.69
## 9 avg_pct_met_a… frpm_percent 35 gp_r… -5.15 9.86 -24.5 14.2
## 10 avg_pct_met_a… frpm_percent 35 gp_r… -18.5 7.43 -33.1 -3.96
## # ℹ 14 more rows
## # ℹ 2 more variables: p_value <dbl>, significant <lgl>
# ---- Usage FRPM 35% ------------------------------------------------------
outcomes <- c(
"chronic_absenteeism",
"btb",
"avg_pct_met_above_ELA",
"avg_pct_met_above_Math",
"avg_pct_not_met_ELA",
"avg_pct_not_met_Math",
"avg_scale_score_ELA",
"avg_scale_score_Math"
)
frpm_gp_rdd_75_comparison_table <- run_rdd_comparisons(
df = df_clean,
running_variable = "frpm_percent",
cutoff = 75,
outcomes = outcomes,
covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"),
local_bw = 5
)
print(frpm_gp_rdd_75_comparison_table)
## # A tibble: 24 × 10
## outcome running_variable cutoff model tau se ci_lower ci_upper
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 chronic_absen… frpm_percent 75 gp_r… -0.0740 6.06 -12.0 11.8
## 2 chronic_absen… frpm_percent 75 gp_r… -0.0740 6.06 -12.0 11.8
## 3 chronic_absen… frpm_percent 75 gp_r… -1.83 8.81 -19.1 15.4
## 4 btb frpm_percent 75 gp_r… -0.159 0.208 -0.567 0.249
## 5 btb frpm_percent 75 gp_r… -0.159 0.208 -0.567 0.249
## 6 btb frpm_percent 75 gp_r… -0.0461 0.303 -0.639 0.547
## 7 avg_pct_met_a… frpm_percent 75 gp_r… 1.10 6.08 -10.8 13.0
## 8 avg_pct_met_a… frpm_percent 75 gp_r… 1.10 6.08 -10.8 13.0
## 9 avg_pct_met_a… frpm_percent 75 gp_r… 3.17 11.6 -19.6 26.0
## 10 avg_pct_met_a… frpm_percent 75 gp_r… -0.325 5.97 -12.0 11.4
## # ℹ 14 more rows
## # ℹ 2 more variables: p_value <dbl>, significant <lgl>
# ---- Usage FRPM 35% ------------------------------------------------------
outcomes <- c(
"chronic_absenteeism",
"btb",
"avg_pct_met_above_ELA",
"avg_pct_met_above_Math",
"avg_pct_not_met_ELA",
"avg_pct_not_met_Math",
"avg_scale_score_ELA",
"avg_scale_score_Math"
)
upp_gp_rdd_40_comparison_table <- run_rdd_comparisons(
df = df_clean,
running_variable = "undup_pct",
cutoff = 40,
outcomes = outcomes,
covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"),
local_bw = 5
)
print(upp_gp_rdd_40_comparison_table)
## # A tibble: 24 × 10
## outcome running_variable cutoff model tau se ci_lower ci_upper
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 chronic_absen… undup_pct 40 gp_r… 1.43 6.90 -12.1 15.0
## 2 chronic_absen… undup_pct 40 gp_r… 1.43 6.90 -12.1 15.0
## 3 chronic_absen… undup_pct 40 gp_r… -1.89 9.64 -20.8 17.0
## 4 btb undup_pct 40 gp_r… 0.276 0.247 -0.208 0.759
## 5 btb undup_pct 40 gp_r… 0.276 0.247 -0.208 0.759
## 6 btb undup_pct 40 gp_r… -0.0789 0.326 -0.717 0.559
## 7 avg_pct_met_a… undup_pct 40 gp_r… -7.41 7.36 -21.8 7.01
## 8 avg_pct_met_a… undup_pct 40 gp_r… -7.41 7.36 -21.8 7.01
## 9 avg_pct_met_a… undup_pct 40 gp_r… -2.07 10.8 -23.2 19.0
## 10 avg_pct_met_a… undup_pct 40 gp_r… -6.34 7.60 -21.2 8.55
## # ℹ 14 more rows
## # ℹ 2 more variables: p_value <dbl>, significant <lgl>
# ---- Usage Unduplicate Peer Percent 75% ------------------------------------------------------
outcomes <- c(
"chronic_absenteeism",
"btb",
"avg_pct_met_above_ELA",
"avg_pct_met_above_Math",
"avg_pct_not_met_ELA",
"avg_pct_not_met_Math",
"avg_scale_score_ELA",
"avg_scale_score_Math"
)
upp_gp_rdd_75_comparison_table <- run_rdd_comparisons(
df = df_clean,
running_variable = "undup_pct",
cutoff = 75,
outcomes = outcomes,
covariate_names = c("Charter", "DASS", "pct_hispanic", "pct_white", "pct_two_or_more"),
local_bw = 5
)
print(upp_gp_rdd_75_comparison_table)
## # A tibble: 24 × 10
## outcome running_variable cutoff model tau se ci_lower ci_upper p_value
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 chroni… undup_pct 75 gp_r… -1.67 6.68 -14.8 11.4 0.803
## 2 chroni… undup_pct 75 gp_r… -1.67 6.68 -14.8 11.4 0.803
## 3 chroni… undup_pct 75 gp_r… -4.72 6.93 -18.3 8.86 0.496
## 4 btb undup_pct 75 gp_r… 0.0944 0.225 -0.347 0.535 0.675
## 5 btb undup_pct 75 gp_r… 0.0944 0.225 -0.347 0.535 0.675
## 6 btb undup_pct 75 gp_r… 0.227 0.235 -0.233 0.687 0.333
## 7 avg_pc… undup_pct 75 gp_r… -5.28 6.62 -18.3 7.70 0.425
## 8 avg_pc… undup_pct 75 gp_r… -5.28 6.62 -18.3 7.70 0.425
## 9 avg_pc… undup_pct 75 gp_r… -3.67 7.76 -18.9 11.5 0.636
## 10 avg_pc… undup_pct 75 gp_r… -1.11 6.38 -13.6 11.4 0.862
## # ℹ 14 more rows
## # ℹ 1 more variable: significant <lgl>
Result comparison
classical_rdd_summary_tbl <- classical_rdd_summary_tbl %>% mutate(model="rdd", significant=p_value<0.05)
complete_results_table_summary <- classical_rdd_summary_tbl %>%
rbind(frpm_gp_rdd_35_comparison_table) %>%
rbind(frpm_gp_rdd_75_comparison_table) %>%
rbind(upp_gp_rdd_40_comparison_table) %>%
rbind(upp_gp_rdd_75_comparison_table) %>%
arrange(outcome, running_variable, cutoff, desc(model))
complete_results_table_summary
## # A tibble: 128 × 10
## outcome running_variable cutoff tau se ci_lower ci_upper p_value model
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 avg_pct… frpm_percent 35 -12.9 6.00 -24.6 -1.09 0.0323 rdd
## 2 avg_pct… frpm_percent 35 -14.9 6.74 -28.1 -1.69 0.0271 gp_r…
## 3 avg_pct… frpm_percent 35 -5.15 9.86 -24.5 14.2 0.601 gp_r…
## 4 avg_pct… frpm_percent 35 -14.9 6.74 -28.1 -1.69 0.0271 gp_r…
## 5 avg_pct… frpm_percent 75 8.10 4.95 -1.61 17.8 0.102 rdd
## 6 avg_pct… frpm_percent 75 1.10 6.08 -10.8 13.0 0.856 gp_r…
## 7 avg_pct… frpm_percent 75 3.17 11.6 -19.6 26.0 0.785 gp_r…
## 8 avg_pct… frpm_percent 75 1.10 6.08 -10.8 13.0 0.856 gp_r…
## 9 avg_pct… undup_pct 40 -7.41 7.36 -21.8 7.01 0.314 gp_r…
## 10 avg_pct… undup_pct 40 -2.07 10.8 -23.2 19.0 0.848 gp_r…
## # ℹ 118 more rows
## # ℹ 1 more variable: significant <lgl>
view(complete_results_table_summary)
# 2 RVs
# 2 cutoffs
# 8 outcomes
# 4 models
# 128 total rows